#!/usr/bin/env python
# --*-- coding: UTF-8 --*--
#-*- encoding: utf-8 -*-

import sys, json;
from HTMLParser import HTMLParser
from htmlentitydefs import name2codepoint

class MailHTMLParser(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self);
        self.inTable = False;
        self.inTd = False;
        # 正在编辑的列，开始于1，0表示还没有开始。
        self.currCol = 0;
        # 目标列的编号。
        self.targetCol = 0;
        self.data = {};
        self.currKey = None;

    def handle_starttag(self, tag, attrs):
        if(tag == 'table'):
            self.inTable = True;
        elif(tag == 'td'):
            self.currCol += 1;
            self.inTd = True;

    def handle_endtag(self, tag):
        if(tag == 'table'):
            self.inTable = False;
        elif(tag == 'tr'):
            self.currCol = 0;
        elif(tag == 'td'):
            self.inTd = False;

    def handle_data(self, data):
        if(self.inTd):
            if(self.currCol == 1):
                self.currKey = data;
            elif(self.currCol == 4):
                self.data[self.currKey] = data;
                self.currKey = None;

    def handle_comment(self, data):
        print "Comment  :", data

    def handle_entityref(self, name):
        c = unichr(name2codepoint[name])
        print "Named ent:", c

    def handle_charref(self, name):
        if name.startswith('x'):
            c = unichr(int(name[1:], 16))
        else:
            c = unichr(int(name))
        print "Num ent  :", c

    def handle_decl(self, data):
        print "Decl     :", data
